package com.daervin.svc.parser;

import com.daervin.svc.common.utils.DateTimeUtils;
import org.apache.log4j.Logger;
import org.springframework.util.CollectionUtils;
import org.springframework.util.StringUtils;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Request;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.selector.Selectable;

import java.util.Calendar;
import java.util.Date;
import java.util.List;

/**
 * @author daervin
 * @version 1.0.0
 * @since 1.0.0
 */
public class TmtpostParser extends RootParser {

    private final static Logger LOGGER = Logger.getLogger(ThreeKRParser.class);

    public TmtpostParser(String url) {
        super(url);
    }

    @Override
    public Site getSite() {
        return super.getSite();
    }

    @Override
    public void listProcess(Page page) {
        String xpathStr = "//div[@class='day_part']/";
        List<Selectable> newsViewList = page.getHtml().xpath(xpathStr).nodes();
        if (CollectionUtils.isEmpty(newsViewList) || newsViewList.size() < 2) {
            return;
        }
        Selectable dayItem = newsViewList.get(0);
        Selectable ulItem = newsViewList.get(1);
        if (dayItem == null || ulItem == null) {
            return;
        }
        String day = dayItem.xpath("//div[@class='date']/time/text()").get();
        if (StringUtils.isEmpty(day)) {
            return;
        }
        day = day.replace("年", "-").replace("月", "-").replace("日", "").trim();
        Date dayDate = DateTimeUtils.shortParseDate(day);
        if (dayDate == null || dayDate.before(DateTimeUtils.getStartTime(Calendar.getInstance()))) {
            return;
        }
        List<Selectable> newsUlList = ulItem.xpath("//ul/").nodes();
        if (CollectionUtils.isEmpty(newsUlList)) {
            return;
        }
        for (Selectable item : newsUlList) {
            try {
                String title = item.xpath("//h2[@class='w_tit']/a/text()").get();
                String time = item.xpath("//time[@class='source']/text()").get();
                String links = item.xpath("//h2[@class='w_tit']/a").links().get();
                time = time == null ? "" : time.trim();
                if (StringUtils.isEmpty(links) || StringUtils.isEmpty(title) || StringUtils.isEmpty(time)) {
                    continue;
                }
                Request targetRequest = new Request();
                targetRequest.setUrl(links);
                page.addTargetRequest(targetRequest);
            } catch (Exception e) {
                System.err.println("TmtpostParser error: " + e.getMessage());
                LOGGER.error("TmtpostParser error", e);
            }
        }

    }
}
